
global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/5_map_ipc4_sic_make.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {



* 5_

/* This do-file repeats the previous step with a different mapping from IPC4 into the industry of manufcaturing, 
skipping the inptut/output table. We refer to this as the "DD" approach. 

Inputs:
- ind90_sic.dta, by hand, based on U.S. Census Bureau. 2003. "The Relationship Between The 1990 Census and Census 2000 Industry and Occupation Classification Systems." Technical Paper #65, U.S. Department of Commerce.
- ipc4_sic4_of_mfg_concordance.dta
- US_docdb_cipc.dta
- ind90.dta
- aggregate weights DD.xlsx , by hand

Output:
- patents_ind6090_mfg.dta

*/


* -----------------------------------
* A.Import sic4-ind90 concordance
* -----------------------------------

* This has the "original" sic4's
*by hand, based on U.S. Census Bureau. 2003. "The Relationship Between The 1990 Census and Census 2000 Industry and Occupation Classification Systems." Technical Paper #65, U.S. Department of Commerce.
use ${alm_data_raw}/concordances/ind90_sic/ind90_sic.dta, clear
drop SIC
drop if SIC4digit==""

*7219 is a special case, 
expand 2 if SIC4digit=="7210 exc. part 7219"
sort SIC4digit
quietly by SIC4digit:  gen dup2 = cond(_N==1,0,_n) if SIC4digit=="7210 exc. part 7219" 
replace SIC4digit="7219" if dup2==2
drop dup
*rename
replace SIC4digit="7210 exc. 7219" if SIC4digit=="7210 exc. part 7219" 

*Generate weights - only in very few cases required - otherwise 1:1 mapping
duplicates tag SIC, gen(dup)
replace dup=dup+1
gen w_2 =1/dup 
drop dup Descr

gen temp=strlen(SIC4digit)
replace SIC4digit="0" + SIC4digit if temp==3
drop temp
rename SIC4digit SIC4

label var w_2 "Weight for duplicate SICs"
tempfile temp1
save "`temp1'", replace

* -----------------------------------------------------
* B. MAP IPC4-SIC of mfg with concordance SIC - ind90
* -----------------------------------------------------

*Basically we just have to match at the SIC level. However, those two
*datasets (current_sic and the SIC) are at two very diffrent level of
*aggregation. Thus we have to fix this. 

*First do the simple cases like:
*	1.2.a) Cases where the current_sic and SIC4 already match uniquely
* 	1.2.b) Cases where the current_sic and the SIC4 of one or two level more 
*				aggregated match uniquely
*	1.2.c) Take the average of weights 
*				example: current_sic 700 gets the average weight of SIC4 
*				0710, 0720, 0740, 0750, 0760, 0780

*Then the difficult cases in 1.2.d)

********************************************************************
* 1.2.a) Cases where the current_sic and SIC4 already match uniquely 
********************************************************************

use ${alm_data_proc}/cw_ipc4_current_sic.dta, clear

*Drop the SIC codes which we cannot match at all (also not below) or which are not a proper SIC 87 code
drop if inlist(current_sic, 3576,4888,4955,4991,9995,9997)

*Reshuffle weights such that they sum up again to 1 per ipc4 code
ren w_ipc4_current_sic weight_ipc4_sic_mfg

egen total1=total(weight_ipc4_sic_mfg), by(ipc4) 
replace weight_ipc4_sic_mfg=weight_ipc4_sic_mfg/total1
drop total1

tostring current_sic, gen(current_sic_s)
replace current_sic_s= "0" + current_sic_s if current_sic<1000
rename current_sic_s SIC4

*Match
joinby SIC4 using "`temp1'", unmatched(both)

*Drop if only in using 
drop if _merge==2

*test
tab SIC4 if _merge==3
rename _merge _merge_i
* this is the weight from "temp1" above 
rename w_2 w_2_i
rename ind90 ind90_i

label var w_2_i "Weight for duplicate SICs"
tempfile temp2
save "`temp2'", replace

****************************************************************************
* 1.2.b) Cases where the current_sic and the SIC4 of one or two level more *
****************************************************************************
* aggregated match uniquely
*Import the excel file which shows this matching, aggregation table by hand
import excel ${alm_data_raw}/concordances/aggregate_weights_DD.xlsx, sheet("forcode") firstrow clear

* TODO: I have no idea how and where this excel was made! I just pasted it into the new file structure and made the spcaes underscores

tostring SIC4, replace
tempfile temp3
save "`temp3'", replace

use"`temp2'", clear
merge m:1 SIC4 using "`temp3'"

assert _merge!=2
assert _merge_i==1 if _merge==3
assert _merge==1 if _merge_i==3
rename _merge _merge_temp

tempfile temp4
save "`temp4'", replace

use "`temp1'", clear
rename SIC4 SIC4_ii

tempfile temp5
save "`temp5'", replace

use "`temp4'", clear
*Match with overall data file
joinby  SIC4_ii using "`temp5'", unmatched(both)

*Drop if only in using 
drop if _merge==2

assert _merge==_merge_temp
drop _merge_temp
rename _merge _merge_ii
rename w_2 w_2_ii
rename ind90 ind90_ii

*Combine weight, ind90 and merge in one variable (_final)-> we have to do this because we have to match with the next level of aggregation
*thats also where the weird weight variable names come from
gen w_2_final=w_2_i
replace w_2_final=w_2_ii if _merge_ii==3

gen ind90_final=ind90_i
replace ind90_final=ind90_ii if _merge_ii==3

gen _merge_final=_merge_i
replace _merge_final=_merge_ii if _merge_ii==3

drop SIC4_ii 

label var w_2_final "Corrected Employment weight for SIC4 aggregation"
label var weight_ipc4_sic_mfg "original employment weight for SIC4"
label var _merge_i "merge for SIC4 to current sic"
label var _merge_ii "merge for SIC4 to SIC4 aggregation"
label var _merge_final "final merge, for sic aggregation"
label var ind90_final "ind90 corrected for aggregation of current sic and sic4"

tempfile temp6
save "`temp6'", replace

************************************************************
*1.2 .c) Take the average of weights                       *
************************************************************

use "`temp1'", clear
sort SIC4

*Generate the right level of aggregation
gen SIC4_3=substr(SIC4, 1, 3)
replace SIC4_3 = SIC4_3 +"0"

gen SIC4_2=substr(SIC4, 1, 2)
replace SIC4_2 = SIC4_2 +"00"

gen SIC4_iii=""

*these are the codes we need to average over, SIC4_iii is the toplevel code for the subclasses that we want to aggregate on
foreach x in 0700 4100 5500 7200 7600 8200{
replace SIC4_iii=SIC4_2 if SIC4_2=="`x'" 
}

foreach x in  3360 5190 5990 {
replace SIC4_iii=SIC4_3 if SIC4_3=="`x'" 
}

drop if SIC4_iii==""
keep w_2 SIC4_iii ind90 SIC4

*Generate weights within the ones we want to average over
duplicates tag SIC4_iii, gen(dup)
replace dup=dup+1
gen temp_w=1/dup
drop dup

*Some temp_w wrong
replace temp_w=5/6 if SIC4=="4100 exc. 4120"
replace temp_w=1/6 if SIC4=="4120"
replace temp_w=1/7 if SIC4_iii=="7200"
replace temp_w=(1/7)*(1/8) if SIC4=="7219"
replace temp_w=(1/7)*(7/8) if SIC4=="7210 exc. 7219"
replace temp_w=1/4 if SIC4_iii=="7600"
replace temp_w=(1/4)*(1/3) if inlist(SIC4,"7692","7694","7699")

*distribute the previous weights to the temporary ones and get average
gen temp_w2=w_2*temp_w
egen w_2_iii=total(temp_w2), by(ind90 SIC4_iii)
drop temp_w temp_w2 w_2
duplicates drop ind90 SIC4_iii, force
egen test=total(w_2_iii), by(SIC4_iii)
assert test==1
drop test SIC4

rename SIC4_iii SIC4
rename ind90 ind90_iii

tempfile temp7
save "`temp7'", replace


*Match with overall data
use "`temp6'", clear
joinby SIC4 using "`temp7'", unmatched(both)
assert _merge!=2

rename _merge _merge_iii

*assign to the one variable (_final)
replace w_2_final=w_2_iii if _merge_iii==3
replace ind90_final=ind90_iii if _merge_iii==3
replace _merge_final=_merge_iii if _merge_iii==3

label var _merge_iii "was averaged over subclasses"
label var w_2_iii "Average weight for sic aggregated over sublcasses"

tempfile temp8
save "`temp8'", replace

************************
*1.2.e) Difficult cases*
************************

*In this cases we have to generate a weighted weight which depends on how
*many machinery patents are assigned per IPC and current_sic code and which SIC 
*subcodes occur per IPC and curren_sic codes.
*example: 1000
*If 2000 occurs alone for a specific IPC code - weight is average of subcodes 
*(so 2010,2020,2030,2040,2050,2060,2070,2080,2090)
*If 2000 occurs with 2010 and 2030 for a specific IPC code - weight is weighted
*average of w2010 and w2030 mapping - weighted by how many machinery patents are assigned from 
*this IPC code to the 2010 and 2030 industry 
*etc.

*There are two difficult cases
*I) normal one
*II) 2 level ones  

*I) First do the normal ones 
*Only keep those special cases with their subcodes

gen keep=0
replace keep=1 if inlist(SIC4,"2000","2011","2013","2015","2020","2024","2030")
replace keep=1 if inlist(SIC4,"2033","2040","2050","2052","2060","2070","2080")
replace keep=1 if inlist(SIC4,"2082","2084","2085","2086","2090","2092","2200")
replace keep=1 if inlist(SIC4,"2211","2221","2250","2253","2273","2300","2320")
replace keep=1 if inlist(SIC4,"2330","2340","2390","2400","2421","2430","2452")
replace keep=1 if inlist(SIC4,"2600","2611","2621","2631","2650","2670","2673")
replace keep=1 if inlist(SIC4,"2800","2810","2820","2821","2833","2834","2835")
replace keep=1 if inlist(SIC4,"2836","2840","2842","2844","2851","2860","2870")
replace keep=1 if inlist(SIC4,"2890","2891","3100","3140","3330","3334","3350")
replace keep=1 if inlist(SIC4,"3357","3570","3571","3572","3575","3577","3578")
replace keep=1 if inlist(SIC4,"3579","3600","3612","3613","3620","3621","3630")
replace keep=1 if inlist(SIC4,"3634","3640","3651","3652","3661","3663","3669")
replace keep=1 if inlist(SIC4,"3670","3672","3674","3677","3678","3679","3690")
replace keep=1 if inlist(SIC4,"3695","5090","5093")
replace keep=1 if inlist(SIC4,"5094","5099","5200","5211","5700","5712","5731")
replace keep=1 if inlist(SIC4,"5735","5940","5944","5960","5961","7380","7381")
replace keep=1 if inlist(SIC4,"7389","7500","7510","8700","8711","8721","8731")
replace keep=1 if inlist(SIC4,"8734","8742")
keep if keep==1
drop keep 

sort ipc4 SIC4

*standardize the codes
gen SIC4_2=substr(SIC4, 1, 2)
replace SIC4_2= SIC4_2 + "00"

gen SIC4_3=substr(SIC4, 1, 3)
replace SIC4_3= SIC4_3 + "0"

gen agg_level=""

*Assign right level of aggregation
foreach x in 2000 2200 2300 2400 2600 2800 3100 3600 5200 5700 7500 8700{
replace agg_level=SIC4_2 if SIC4_2=="`x'"
}

foreach x in  3330 3350 3570 5090 5940 5960 7380  {
replace agg_level=SIC4_3 if SIC4_3=="`x'"
}

duplicates tag agg_level ipc4, gen(temp)
replace temp=temp+1
gen dup=1 if temp==1 & agg_level==SIC4
drop SIC4_2 SIC4_3 temp

tempfile temp9
save "`temp9'", replace

*If dup==1 then we cannot build a weighted weight (because the code occurs alone)
*and thus we simply assign the average of the weights at one aggregation level
*less - create those weights in a separate dataset and match them 
use "`temp1'", clear
sort SIC4

gen SIC4_3=substr(SIC4, 1,3)
replace SIC4_3=SIC4_3 + "0"

gen SIC4_2=substr(SIC4, 1,2)
replace SIC4_2=SIC4_2 + "00"

gen agg_level=""
foreach x in 2000 2200 2300 2400 2600 2800 3100 3600 5200 5700 7500 8700{
    replace agg_level=SIC4_2 if SIC4_2=="`x'"

}

foreach x in 3330 3350 3570 5090 5940 5960 7380{
    replace agg_level=SIC4_3 if SIC4_3=="`x'"
}

keep if agg_level!=""
drop SIC4_3 SIC4_2


*Generate average weights
duplicates tag agg_level, gen(dup)
replace dup=dup+1
gen temp_w=1/dup
drop dup


*Some temp_w wrong
replace temp_w=4/5 if SIC4=="5090 exc. 5093"
replace temp_w=1/5 if SIC4=="5093"
replace temp_w=1/3 if agg_level=="5700"
replace temp_w=(1/3)*(1/4) if inlist(SIC4, "5731","5734","5735","5736")
replace temp_w=1/4 if agg_level=="7500"
replace temp_w=(1/4)*(1/2) if inlist(SIC4, "7542","7549")
replace temp_w=1/5 if agg_level=="3350"

gen temp_w2=w_2*temp_w
egen w_2_v=total(temp_w2), by(ind90 agg_level)

drop temp_w temp_w2 w_2
duplicates drop ind90 agg_level, force
egen test=total(w_2_v), by(agg_level)
assert test==1
drop test SIC4

*Match with temp9 file
gen dup=1
tempfile temp10
save "`temp10'", replace

use "`temp9'", clear
joinby agg_level dup using "`temp10'" , unmatched(both)

assert _merge!=2
assert _merge==3 if dup==1
assert _merge==1 if dup!=1
rename _merge _merge_v
rename ind90 ind90_v



sort ipc4 agg_level SIC4

*Drop the cases where less agg SIC code occurs but one we would have to aggregate
*so in ipc4 A01B 2821 occurs but 2820 not - so we can drop 2821
gen keep=1 if agg_level==SIC4
egen sum_keep=total(keep), by(agg_level ipc4)
assert _merge_final==3 if dup==0 
drop if sum_keep==0
drop sum_keep


*Generate weighted weight of w_2 for agg_level where dup!=1
gen temp0=w_2_final*weight_ipc4_sic_mfg
egen total1=sum(temp0) if dup!=1 & keep!=1, by(ipc4 agg_level)
gen temp1=temp0/total1 if dup!=1 & keep!=1
egen w_2_vi=total(temp1), by(ipc4 agg_level ind90_final) 
replace w_2_vi=. if dup==1 & keep==1
replace w_2_vi=. if keep==1 

duplicates drop agg_level ind90_final ipc4 if dup!=1 & keep!=1 , force
drop total1 temp0 temp1
drop if keep==1 & dup!=1 & w_2_vi==. 

assert w_2_v==. if w_2_vi!=. 
assert w_2_vi==. if w_2_v!=. 

gen w_2_vii=w_2_vi
replace w_2_vii=w_2_v if keep==1 & dup==1
assert w_2_vii!=.

gen ind90_vii=ind90_final
replace ind90_vii= ind90_v if keep==1 & dup==1
assert ind90_vii!=.

keep ipc4 agg_level w_2_vii ind90_vii
rename agg_level SIC4

egen total=total(w_2_vii), by(ipc4 SIC4)
assert total>0.99999 & total<1.00001
drop total

label var w_2_vii "Corrected Employment weight for SIC4 aggregation"
label var ind90_vii "ind90 corrected for aggregation of current sic and sic4 "

tempfile temp11
save "`temp11'", replace

*match with the big datafile
use "`temp8'", clear

joinby ipc4 SIC4 using "`temp11'", unmatched(both)

rename _merge _merge_vii
assert _merge_final==1 if _merge_vii==3

*assign to the one variable (_final)
replace w_2_final=w_2_vii if _merge_vii==3
replace ind90_final=ind90_vii if _merge_vii==3
replace _merge_final=_merge_vii if _merge_vii==3

tempfile temp77
save "`temp77'", replace

*II) Now the 2 level ones 
*Only keep the special cases with their subcodes
gen keep=0
replace keep=1 if inlist(SIC4,"5900","5912","5940","5944","5960","5961","5990")
replace keep=1 if inlist(SIC4,"5000","5010","5013","5020","5030","5031")
replace keep=1 if inlist(SIC4,"5040","5045","5047","5051","5063","5064","5065")
replace keep=1 if inlist(SIC4,"5070","5072","5080","5082","5084","5090","5093")
replace keep=1 if inlist(SIC4,"5094","5099")


keep if keep==1
drop keep 
sort ipc4 SIC4

*Assign right level of aggregation
gen agg_level=substr(SIC4, 1, 2)
replace agg_level= agg_level + "00"

duplicates tag agg_level ipc4, gen(temp)
replace temp=temp+1
gen dup=1 if temp==1 & agg_level==SIC4
drop temp


*If dup==1 then we cannot build a weighted weight (because the code occurs alone)
*and thus we simply assign the average of the weights at one aggregation level
*less - create those weights in a separate dataset and match them 
tempfile temp12
save "`temp12'", replace

use "`temp1'", clear

gen agg_level=substr(SIC4, 1,2)
replace agg_level=agg_level + "00"

gen keep=0
foreach x in 5900 5000{
replace keep=1 if agg_level=="`x'"
}

drop if keep==0
drop keep

gen temp=SIC4
replace temp="0" if SIC4=="5090 exc. 5093"
destring temp, replace

*Generate average weight
gen w_2_temp=(1/7) if agg_level=="5900"
replace w_2_temp=(1/7)*(1/9) if temp>5940 & temp<5950
replace w_2_temp=(1/7)*(1/3) if temp>5960 & temp<5970
replace w_2_temp=(1/7)*(1/5) if temp>5990 & temp<6000
replace w_2_temp=(1/9) if agg_level=="5000"
replace w_2_temp=(1/9)*(4/5) if SIC4=="5090 exc. 5093"
replace w_2_temp=(1/9)*(1/5) if SIC4=="5093"

egen w_2_viii=total(w_2_temp),by(ind90 agg_level)
duplicates drop ind90 agg_level, force

keep ind90 agg_level w_2_viii
egen total=total(w_2_viii), by(agg_level)
assert total==1
drop total

*Match with the temp12 data
gen dup=1
tempfile temp13
save "`temp13'", replace
use "`temp12'",clear

joinby agg_level dup using "`temp13'" , unmatched(both)

assert _merge!=2
assert _merge==3 if dup==1
assert _merge==1 if dup!=1
rename _merge _merge_viii
rename ind90 ind90_viii



sort ipc4 agg_level SIC4

*Drop the cases where less agg SIC code occurs but one we would have to aggregate not
*so in ipc4 A01B 2821 occurs but 2820 not - so we can drop 2821
gen keep=1 if agg_level==SIC4
egen sum_keep=total(keep), by(agg_level ipc4)
assert _merge_final==3 if dup==0 
drop if sum_keep==0
drop sum_keep

*Generate weighted weight for agg_level where dup!=1
*use weight from one level below only (so if 2250 and 2253 occurs in 2200 only
*use weight from 2250 but the machinery patent counts (the weight_ipc4_sic_mfg) from 2250 and 2253 aggregated)
*like this it is similar to the average of average but with weight (is often the
*same as when we use weights and patent counts from each level (so from 2250 and
*2253) because 2250 often same weight as 2251). 


*First aggregate some patent counts by 3-digit level
gen agg_level_1=substr(SIC4, 1,3)
replace agg_level_1=agg_level_1 + "0"

*Assign patent counts to higher level (3 digit code) if existent 
gen tag=1 if SIC4==agg_level_1 & keep!=1
egen sum_tag=total(tag), by(agg_level_1 ipc4)
gen zero= strrpos(SIC4, "0")
gen digit_4=1 if inlist(zero, 2,0)
drop zero
gen digit_3=1 if inlist(SIC4,"5010","5020","5030","5040","5070","5080")
replace digit_3=1 if inlist(SIC4,"5090","5940","5960","5990")
gen digit_2=1 if inlist(SIC4, "5000", "5900")
gen digit=.
replace digit=4 if digit_4==1
replace digit=3 if digit_3==1
replace digit=2 if digit_2==1
drop digit_*

sort ipc4 agg_level SIC4
egen total=total(digit) if keep!=1 & sum_tag!=0, by(ipc4 agg_level_1)

*sum up the weights again
egen total_w=total(weight_ipc4_sic_mfg) if inlist(total, 7,11,14), by(agg_level_1 ipc4 ind90_final)
egen test=total(weight_ipc4_sic_mfg), by(agg_level_1 ipc4)

*Drop the lower level 
drop if digit==4 & inlist(total, 7,11,14) & tag==.
replace weight_ipc4_sic_mfg=total_w if tag==1 & inlist(total, 7,11,14)
drop total total_w digit

egen test2=total(weight_ipc4_sic_mfg), by(agg_level_1 ipc4)
drop test2 test sum_tag tag

gen temp0=w_2_final*weight_ipc4_sic_mfg
egen total1=sum(temp0) if dup!=1 & keep!=1, by(ipc4 agg_level)
gen temp1=temp0/total1 if dup!=1 & keep!=1
egen w_2_ix=total(temp1), by(ipc4 agg_level ind90_final) 

replace w_2_ix=. if dup==1 & keep==1
replace w_2_ix=. if keep==1 

duplicates drop agg_level ind90_final ipc4 if dup!=1 & keep!=1 , force
drop temp0 total1 temp1

drop if keep==1 & dup!=1 & w_2_ix==. 

assert w_2_ix==. if w_2_viii!=. 
assert w_2_viii==. if w_2_ix!=. 

gen w_2_x=w_2_ix
replace w_2_x=w_2_viii if keep==1 & dup==1
assert w_2_x!=.

gen ind90_x=ind90_final
replace ind90_x= ind90_viii if keep==1 & dup==1
assert ind90_x!=.


keep ipc4 agg_level w_2_x ind90_x
rename agg_level SIC4

*again, collect weights and checck if plausible, allow for rounding errors
egen total=total(w_2_x), by(ipc4 SIC)
assert total>0.9999 & total<1.00001
drop total

tempfile temp14
save "`temp14'", replace

*match with the big datafile
use "`temp77'", clear
joinby ipc4 SIC4 using "`temp14'", unmatched(both)

assert _merge!=2
rename _merge _merge_x
assert _merge_final==1 if _merge_x==3

*assign to the one variable (_final)
replace w_2_final=w_2_x if _merge_x==3
replace ind90_final=ind90_x if _merge_x==3
replace _merge_final=_merge_x if _merge_x==3

assert _merge_final==3
*finished - we have a w_2 weight for all current_sic codes (aka SIC4)
egen test=rowtotal(_merge*)
assert test==10
drop test

*keep only essential variables
keep ipc4 weight_ipc4_sic_mfg w_2_final ind90_final  SIC4

*****************************************
* 2. COMPLETE MAPPING IPC4 INTO IND90   *
*****************************************

*multiply the weights (emplyoment with faction of ipc code patents)
gen temp= weight_ipc4_sic_mfg *w_2_final
*sum up by technology and industry of use
egen weight_ipc4_sic_mfg_agg  = total(temp), by(ipc4 ind90_final)

duplicates drop ipc4 ind90_final, force
*test
egen total=total(weight_ipc4_sic_mfg_agg), by(ipc4)
assert total>0.99999 & total<1.00001
*ok equal to 1
drop total

keep ipc4 ind90_final weight_ipc4_sic_mfg_agg
rename ind90_final ind90
sort ipc4 ind90

* ------------------------------------------------
* D. Map into ind690 industry classification, save
* ------------------------------------------------
*Autor, David H. "The Skill Content of Recent Technological Change: An Empirical Exploration" 
*Quarterly Journal of Economics, 118(4), November 2003, 1279-1334. Accessed November 2018. https://economics.mit.edu/people/faculty/david-h-autor/data-archive.
joinby ind90 using ${alm_data_raw}/concordances/ind90_sic/ind90.dta, unmatched(both)
drop ind7090 ind8090 dind8090 dind7090 mind8090 mind7090 
drop if _merge==2
assert _merge==3
drop _merge

ren weight_ipc4_sic_mfg_agg w_ipc4_ind6090_mk
keep ipc4 w_ipc4_ind6090_mk ind6090
save ${alm_data_proc}/cw_ipc4_ind6090_mk.dta, replace

* ---------------------------------------------
* E. Apply sic4-ind6090 cw to get ipc4-sic4_mk
* ---------------------------------------------

mmerge ind6090 using ${alm_data_proc}/cw_sic4_ind6090.dta, unmatched(master)
assert _m == 3
drop _m
gen w_ipc4_sic4_mk = w_ipc4_ind6090_mk * weight
ren sic4 sic4_mk
keep ipc4 sic4_mk w_ipc4_sic4_mk
duplicates drop
save ${alm_data_proc}/cw_ipc4_sic4_mk.dta, replace


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat